一元线性相关与回归

pearson相关系数

data1=read.csv("test1.csv",encoding = 'UTF-8')
fm=lm(prec~lati,data=data1)
x=fm$model[,1]
y=fm$model[,2]
fm$model
ABCDEFGHIJ0123456789
 
 
prec
<dbl>
lati
<dbl>
148.2540.5
2193.7236.6
3413.9435.5
4358.6037.5
5615.0435.4
6752.4233.8
7435.4335.6
8238.5536.6
987.8539.8
10316.0036.1

cor(x,y)
## [1] -0.9041104
cor.test(x,y)
## 
##  Pearson's product-moment correlation
## 
## data:  x and y
## t = -15.11, df = 51, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.9437700 -0.8387982
## sample estimates:
##        cor 
## -0.9041104

线性回归模型

fm$coefficients
## (Intercept)        lati 
##  3392.17587   -82.09156

所以纬度与降水量线性回归模型为 y=3392.1758782.09156x

多元线性相关和回归

多元线性回归模型

fm1=lm(prec~lati+long+alti+evap,data=data1)
fm1$coefficients
##   (Intercept)          lati          long          alti          evap 
## 1333.42267932  -55.43028189   10.86386521    0.04060350   -0.06360033

标准化偏回归系数

coef.sd<-function(fm){
  b=fm$coefficients
  si=apply(fm$model,2,sd)
  bi=b[-1]*si[-1]/si[1];bi
}
coef.sd(fm1)
##       lati       long       alti       evap 
## -0.6104780  0.1652505  0.1150597 -0.1655769

回归系数检验

summary(fm1)
## 
## Call:
## lm(formula = prec ~ lati + long + alti + evap, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -191.98  -58.43  -10.53   50.50  176.12 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1333.42268 1043.47232   1.278   0.2074    
## lati         -55.43028   12.57465  -4.408 5.85e-05 ***
## long          10.86387    7.11629   1.527   0.1334    
## alti           0.04060    0.02256   1.800   0.0781 .  
## evap          -0.06360    0.04857  -1.309   0.1967    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 88.23 on 48 degrees of freedom
## Multiple R-squared:  0.8444, Adjusted R-squared:  0.8314 
## F-statistic: 65.11 on 4 and 48 DF,  p-value: < 2.2e-16

复相关

R=(y^iy¯)2(yiy¯)2

R2=

summary(fm1)$r.sq
## [1] 0.8443856

变量选择

变量选择准则

library(leaps)
varselect=regsubsets(prec~lati+long+alti+evap,data=data1)
result=summary(varselect)
data.frame(result$outmat,RSS=result$rss,R2=result$rsq)
ABCDEFGHIJ0123456789
 
 
lati
<chr>
long
<chr>
alti
<chr>
evap
<chr>
RSS
<dbl>
R2
<dbl>
1 ( 1 )*438411.40.8174156
2 ( 1 )**405246.70.8312276
3 ( 1 )***386997.80.8388277
4 ( 1 )****373652.50.8443856
data.frame(result$outmat,adjR2=result$adjr2,Cp=result$cp,BIC=result$bic)
ABCDEFGHIJ0123456789
 
 
lati
<chr>
long
<chr>
alti
<chr>
evap
<chr>
adjR2
<dbl>
Cp
<dbl>
BIC
<dbl>
1 ( 1 )*0.81383557.319027-82.18817
2 ( 1 )**0.82447675.058643-82.38694
3 ( 1 )***0.82896004.714355-80.85874
4 ( 1 )****0.83141775.000000-78.74836
#调整决定系数越大越好,Cp和对于的变量个数p越接近越好,BIC越小越好(是残差平方和的调整修正)

逐步回归分析

fm.step=step(fm1,direction = "forward")#向前引入法
## Start:  AIC=479.62
## prec ~ lati + long + alti + evap
fm.step=step(fm1,direction = "backward")
## Start:  AIC=479.62
## prec ~ lati + long + alti + evap
## 
##        Df Sum of Sq    RSS    AIC
## - evap  1     13345 386998 479.48
## <none>              373653 479.62
## - long  1     18142 391795 480.13
## - alti  1     25226 398878 481.08
## - lati  1    151262 524914 495.64
## 
## Step:  AIC=479.48
## prec ~ lati + long + alti
## 
##        Df Sum of Sq    RSS    AIC
## <none>              386998 479.48
## - long  1     26976 413974 481.05
## - alti  1     41219 428217 482.85
## - lati  1    369472 756470 513.00
fm.step=step(fm1,direction = "both")#逐步筛选法
## Start:  AIC=479.62
## prec ~ lati + long + alti + evap
## 
##        Df Sum of Sq    RSS    AIC
## - evap  1     13345 386998 479.48
## <none>              373653 479.62
## - long  1     18142 391795 480.13
## - alti  1     25226 398878 481.08
## - lati  1    151262 524914 495.64
## 
## Step:  AIC=479.48
## prec ~ lati + long + alti
## 
##        Df Sum of Sq    RSS    AIC
## <none>              386998 479.48
## + evap  1     13345 373653 479.62
## - long  1     26976 413974 481.05
## - alti  1     41219 428217 482.85
## - lati  1    369472 756470 513.00
summary(fm.step)
## 
## Call:
## lm(formula = prec ~ lati + long + alti, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -209.90  -53.49  -13.85   49.06  181.67 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1386.96962 1050.24347   1.321   0.1928    
## lati         -66.07773    9.66096  -6.840 1.18e-08 ***
## long          12.92067    6.99115   1.848   0.0706 .  
## alti           0.04949    0.02167   2.285   0.0267 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 88.87 on 49 degrees of freedom
## Multiple R-squared:  0.8388, Adjusted R-squared:  0.829 
## F-statistic: 85.01 on 3 and 49 DF,  p-value: < 2.2e-16